Part of the possible emulator accuracy issues could be satellite fraction issues. Gonna look at those explicitly.



In [161]:

    
from pearce.emulator import SpicyBuffalo, LemonPepperWet, OriginalRecipe
from pearce.mocks import cat_dict
import numpy as np
from os import path



In [162]:

    
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()



In [163]:

    
#xi gg
training_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_lowmsat/PearceRedMagicXiCosmoFixedNd.hdf5'
#test_file= '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/'
test_file =  '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/PearceRedMagicXiCosmoFixedNd_Test.hdf5'

#xi gm training_file = '/scratch/users/swmclau2/xi_gm_cosmo/PearceRedMagicXiGMCosmoFixedNd.hdf5' test_file = '/scratch/users/swmclau2/xi_gm_cosmo_test2/PearceRedMagicXiGMCosmoFixedNdTest.hdf5'



In [164]:

    
em_method = 'gp'
split_method = 'random'



In [165]:

    
a = 1.0
z = 1.0/a - 1.0



In [166]:

    
scale_bin_centers = np.array([  0.09581734,   0.13534558,   0.19118072,   0.27004994,
                             0.38145568,   0.53882047,   0.76110414,   1.07508818,
                             1.51860241,   2.14508292,   3.03001016,   4.28000311,
                             6.04566509,   8.53972892,  12.06268772,  17.0389993 ,
                            24.06822623,  33.99727318])



In [167]:

    
bin_idx = 1
fixed_params = {'z':z, 'r': scale_bin_centers[bin_idx]}#, 'cosmo': 0}#, 'r':24.06822623}



In [168]:

    
np.random.seed(0)
emu = OriginalRecipe(training_file, method = em_method, fixed_params=fixed_params,
                 custom_mean_function = 'linear', downsample_factor = 0.1)



In [169]:

    
emu.scale_bin_centers









    Out[169]:





array([  0.09581734,   0.13534558,   0.19118072,   0.27004994,
         0.38145568,   0.53882047,   0.76110414,   1.07508818,
         1.51860241,   2.14508292,   3.03001016,   4.28000311,
         6.04566509,   8.53972892,  12.06268772,  17.0389993 ,
        24.06822623,  33.99727318])



In [170]:

    
pred_y, data_y = emu.goodness_of_fit(test_file, statistic = None)



In [171]:

    
test_x, test_y, test_cov, _ = emu.get_data(test_file, emu.fixed_params)

t, old_idxs  = emu._whiten(test_x)



In [172]:

    
resmat_flat = 10**pred_y - 10**data_y
datamat_flat = 10**data_y



In [173]:

    
t_bin = t
acc_bin = np.abs(resmat_flat)/datamat_flat



In [174]:

    
from pearce.mocks.kittens import TrainingBox



In [175]:

    
boxno = 0
cat = TrainingBox(boxno, system = 'sherlock')



In [176]:

    
cat.load(a, HOD='zheng07')



In [ ]:

    
nd = 1e-4



In [ ]:

    
hod_pnames = emu.get_param_names()[7:]
mf = cat.calc_mf()



In [ ]:

    
for pname in hod_pnames:
    print pname, emu.get_param_bounds(pname)



In [ ]:

    
from scipy.optimize import minimize_scalar
def add_logMmin(hod_params, cat):
    """
    In the fixed number density case, find the logMmin value that will match the nd given hod_params
    :param: hod_params:
        The other parameters besides logMmin
    :param cat:
        the catalog in question
    :return:
        None. hod_params will have logMmin added to it.
    """
    hod_params['logMmin'] = 13.0 #initial guess
    #cat.populate(hod_params) #may be overkill, but will ensure params are written everywhere
    def func(logMmin, hod_params):
        hod_params.update({'logMmin':logMmin})
        return (cat.calc_analytic_nd(hod_params) - nd)**2

    res = minimize_scalar(func, bounds = (12.0, 16.0), args = (hod_params,), options = {'maxiter':100},\
                          method = 'Bounded')

    # assuming this doens't fail
    #print 'logMmin', res.x
    hod_params['logMmin'] = res.x
    #print hod_params



In [ ]:

    
sat_fracs = np.zeros((1000,))
sat_nd = np.zeros((1000,))
actual_nd = np.zeros_like(sat_fracs)
log_mMins = np.zeros_like(sat_fracs)

for idx, x in enumerate(test_x[:1000, 7:]):
    hod_params = dict(zip(hod_pnames, x))
    add_logMmin(hod_params, cat)
    log_mMins[idx] = hod_params['logMmin']
    sat_hod = cat.calc_hod(hod_params, component='satellite')
    sat_nd[idx] = np.sum(mf*sat_hod)/((cat.Lbox/cat.h)**3)
    #sat_fracs[idx] = sat_nd/nd
    actual_nd[idx] = cat.calc_analytic_nd(hod_params)
    
sat_fracs = sat_nd/actual_nd



In [ ]:

    
plt.hist(sat_fracs)



In [ ]:

    
sat_fracs.mean()



In [ ]:

    
sat_fracs.std()



In [ ]:

    
plt.hist(log_mMins)



In [ ]:

    
hod_pnames



In [ ]:

    
plt.scatter(test_x[:1000, 9], acc_bin[:1000])



In [ ]:

    
test_x[:5000,0]



In [ ]:

    
pnames = emu.get_param_names()
for i in xrange(7):
    for j in xrange(7):
        mean_acc = np.mean(acc_bin[j*5000:(j+1)*5000])
        plt.scatter(test_x[j*5000, i], mean_acc, label = 'Cosmo %d'%j)
    plt.xlabel(pnames[i])
    plt.ylabel('Avg. Percent Accurate')
    plt.title('r = %.2f'%scale_bin_centers[bin_idx])

    plt.legend(loc = 'best')
    plt.show()



In [ ]:

    
test_x[0*35::1000, 9]



In [ ]:

    
pnames = emu.get_param_names()
for i in xrange(7,11):
    for j in xrange(0,1000):
        mean_acc = np.mean(acc_bin[j::1000])
        plt.scatter(test_x[j, i], mean_acc, label = 'HOD %d'%j, alpha = 0.6)
    plt.xlabel(pnames[i])
    plt.ylabel('Avg. Percent Accurate')
    plt.title('r = %.2f'%scale_bin_centers[bin_idx])
    #plt.legend(loc = 'best')
    plt.show()



In [ ]:

    
mcut = 13.5
sub_test_idx = np.logical_and(test_x[:, 9]>mcut, test_x[:, 7] < mcut)
print np.mean(acc_bin[sub_test_idx]), np.sum(sub_test_idx)



In [ ]:

    
plt.scatter(test_x[:1000, 9], sat_fracs)
plt.xlabel('logM1')
plt.ylabel('Sat Frac')



In [ ]:

    
plt.scatter(test_x[:1000, 9], log_mMins)
plt.xlabel('logM1')
plt.ylabel('logMmin')



In [ ]:

    
plt.hist(1e4*(actual_nd-nd) )



In [ ]:

    
plt.scatter(test_x[:1000, 9], 1e4*(actual_nd-nd) )
plt.xlabel('logM1')
plt.ylabel('Actual nd - Fixed nd')



In [ ]:

    
good_nd_idxs = np.isclose(actual_nd, nd)
print np.sum(good_nd_idxs)/1000.



In [ ]:

    
# NOTE sat_fracs uses actual_nd, so these is a weird selection
good_satfrac_idxs = np.logical_and(0.1 < sat_fracs, sat_fracs < 0.5)
print np.sum(good_satfrac_idxs)/1000.



In [ ]:

    
print np.sum(np.logical_and(good_satfrac_idxs, good_nd_idxs))/1000.



In [ ]: